## Replication code for: Katsumata, Hiroto and Shunya Noda. 2023. 
## "'Kick Them Out' as a Voting Strategy: Theory and Evidence from Multi-member District Elections." 
## The Journal of Politics, forthcoming.

## Analysis of the survey data from the 2001 and 2010 General Elections in Japan
## Author: Hiroto Katsumata
## Date: May, 2023

## Double check that you have not load tidyverse package yet
detach(package:tidyverse,unload=TRUE)
# You will get an error message but it is okay

## Load packages
library(MASS)
library(Hmisc)
library(tidyverse)

## Initial setting
options(stringsAsFactors = FALSE)

## Load functions
source("functions/kickout_functions.R")


## Data wrangling ====================

## Read data and do data wrangling
## Read JES3 data
data0 <- read.csv("data/jes3.csv") %>%
         filter(WAVE_A == 1)

data0[data0 == 88] <- NA
data0[data0 == 99] <- NA
data0[data0 == 888] <- NA
data0[data0 == 999] <- NA

## Read candidate code
code <- read.csv("data/Originalcodejes3_1.csv") %>%
        group_by(Pref) %>%
        summarise(LDP_1 = c((is.na(LDP_1) == 0) %*% Code),
                  LDP_2 = c((is.na(LDP_2) == 0) %*% Code),
                  DPJ_1 = c((is.na(DPJ_1) == 0) %*% Code),
                  DPJ_2 = c((is.na(DPJ_2) == 0) %*% Code)) %>%
        ungroup()

## Merge and make variables
## dbl: double candidates in the district
## sgl: single candidate in the district
## pref: preference
## vote: vote for the candidate in the election
## dblc1: a candidate whose party run two candidates in the district 
##  and with more votes than the colleagues
## dblc2: a candidate whose party run two candidates in the district 
##  and with less votes than the colleagues
## sglc: a candidate whose party run one candidates in the district
data <- left_join(data0, code, by = c("PREF" = "Pref")) %>%
        filter(is.na(LDP_1) == 0) %>%
    mutate(prefLDP_1 = ifelse(A1S1X1C == LDP_1, A1S1X3, 
                       ifelse(A1S2X1C == LDP_1, A1S2X3, 
                       ifelse(A1S3X1C == LDP_1, A1S3X3, 
                       ifelse(A1S4X1C == LDP_1, A1S4X3, 
                       ifelse(A1S5X1C == LDP_1, A1S5X3, 
                       ifelse(A1S6X1C == LDP_1, A1S6X3, NA)))))),
           prefLDP_2 = ifelse(A1S1X1C == LDP_2, A1S1X3, 
                       ifelse(A1S2X1C == LDP_2, A1S2X3, 
                       ifelse(A1S3X1C == LDP_2, A1S3X3, 
                       ifelse(A1S4X1C == LDP_2, A1S4X3, 
                       ifelse(A1S5X1C == LDP_2, A1S5X3, 
                       ifelse(A1S6X1C == LDP_2, A1S6X3, NA)))))),
           prefDPJ_1 = ifelse(A1S1X1C == DPJ_1, A1S1X3, 
                       ifelse(A1S2X1C == DPJ_1, A1S2X3, 
                       ifelse(A1S3X1C == DPJ_1, A1S3X3, 
                       ifelse(A1S4X1C == DPJ_1, A1S4X3, 
                       ifelse(A1S5X1C == DPJ_1, A1S5X3, 
                       ifelse(A1S6X1C == DPJ_1, A1S6X3, NA)))))),
           prefDPJ_2 = ifelse(A1S1X1C == DPJ_2, A1S1X3, 
                       ifelse(A1S2X1C == DPJ_2, A1S2X3, 
                       ifelse(A1S3X1C == DPJ_2, A1S3X3, 
                       ifelse(A1S4X1C == DPJ_2, A1S4X3, 
                       ifelse(A1S5X1C == DPJ_2, A1S5X3, 
                       ifelse(A1S6X1C == DPJ_2, A1S6X3, NA))))))
           ) %>%
    mutate(age = 2001 - BORNY,
           gender = GENDER,
           university = EDUCAT01 == 4) %>%
    mutate(prefdblc1 = ifelse(PREF != 20, prefLDP_1, prefDPJ_1),
           prefdblc2 = ifelse(PREF != 20, prefLDP_2, prefDPJ_2),
           prefsglc = ifelse(PREF != 20, prefDPJ_1, prefLDP_1)) %>%
    mutate(voteLDP_1 = B2C == LDP_1,
           voteLDP_2 = B2C == LDP_2,
           voteDPJ_1 = B2C == DPJ_1,
           voteDPJ_2 = B2C == DPJ_2) %>%
    mutate(votedblc1 = ifelse(PREF != 20, voteLDP_1, voteDPJ_1),
           votedblc2 = ifelse(PREF != 20, voteLDP_2, voteDPJ_2),
           votesglc = ifelse(PREF != 20, voteDPJ_1, voteLDP_1)) %>%
    mutate(prefLDPparty = A5X2,
           prefDPJparty = A5X3,
           prefdblparty = ifelse(PREF != 20, prefLDPparty, prefDPJparty),
           prefsglparty = ifelse(PREF != 20, prefDPJparty, prefLDPparty)) %>%
    mutate(partyIDLDP = A7 == 1,
           partyIDDPJ = A7 == 2,
           partyIDdbl = ifelse(PREF != 20, partyIDLDP, partyIDDPJ),
           partyIDsgl = ifelse(PREF != 20, partyIDDPJ, partyIDLDP))

## Exclude Miyagi Prefecture beacause two LDP candidates were 
##  closely (r > 0.9) competing for the last seat (non-discriminating equilibrium)
jes3 <- data %>%
        filter(PREF != 4) %>%
        dplyr::select(matches("^(pref|vote|party)+"), age, gender, university) %>%
        dplyr::select(-matches("+(LDP|DPJ)+"), age, gender, university, partyIDLDP, partyIDDPJ) %>%
        mutate(jes = 3)


## Read JES4 data
data5 <- read.csv("data/jes4_5.csv")
data6 <- read.csv("data/jes4_6.csv")

data5[data5 == 88] <- NA
data5[data5 == 99] <- NA
data5[data5 == 888] <- NA
data5[data5 == 999] <- NA
data6[data6 == 88] <- NA
data6[data6 == 99] <- NA
data6[data6 == 888] <- NA
data6[data6 == 999] <- NA

colnames5 <- colnames(data5)
colnames6 <- colnames(data6)
colnames(data5) <- str_c(colnames5, "_5")
colnames(data6) <- str_c(colnames6, "_6")

## Read candidate code
code5 <- read.csv("data/Originalcodejes4_5.csv") %>%
         group_by(Pref) %>%
         summarise(LDP = c((is.na(LDP) == 0) %*% Code),
                   DPJ_1 = c((is.na(DPJ_1) == 0) %*% Code),
                   DPJ_2 = c((is.na(DPJ_2) == 0) %*% Code)) %>%
         ungroup()

## Merge and make variables
## dbl: double candidates in the district
## sgl: single candidate in the district
## pref: preference
## vote: vote in the election
## dblc1: a candidate whose party run two candidates in the district 
##  and with more votes than the colleagues
## dblc2: a candidate whose party run two candidates in the district 
##  and with less votes than the colleagues
## sglc: a candidate whose party run one candidates in the district
data <- left_join(data5, data6, by = c("ID_5" = "ID_6")) %>%
        left_join(., code5, by = c("PREFECTURE_SAMPLE_5" = "Pref")) %>%
        filter(is.na(LDP) == 0) %>%
    mutate(age = F2_AGE_5,
           gender = F1_5,
           university = F4_5 == 4) %>%
    mutate(prefdblc1 = ifelse(Q1_1st_1_5 == DPJ_1, Q1_1st_3_5, 
                       ifelse(Q1_2nd_1_5 == DPJ_1, Q1_2nd_3_5, 
                       ifelse(Q1_3rd_1_5 == DPJ_1, Q1_3rd_3_5, 
                       ifelse(Q1_4th_1_5 == DPJ_1, Q1_4th_3_5, 
                       ifelse(Q1_5th_1_5 == DPJ_1, Q1_5th_3_5, 
                       ifelse(Q1_6th_1_5 == DPJ_1, Q1_6th_3_5, NA)))))),
           prefdblc2 = ifelse(Q1_1st_1_5 == DPJ_2, Q1_1st_3_5, 
                       ifelse(Q1_2nd_1_5 == DPJ_2, Q1_2nd_3_5, 
                       ifelse(Q1_3rd_1_5 == DPJ_2, Q1_3rd_3_5, 
                       ifelse(Q1_4th_1_5 == DPJ_2, Q1_4th_3_5, 
                       ifelse(Q1_5th_1_5 == DPJ_2, Q1_5th_3_5, 
                       ifelse(Q1_6th_1_5 == DPJ_2, Q1_6th_3_5, NA)))))),
           prefsglc = ifelse(Q1_1st_1_5 == LDP, Q1_1st_3_5, 
                      ifelse(Q1_2nd_1_5 == LDP, Q1_2nd_3_5, 
                      ifelse(Q1_3rd_1_5 == LDP, Q1_3rd_3_5, 
                      ifelse(Q1_4th_1_5 == LDP, Q1_4th_3_5, 
                      ifelse(Q1_5th_1_5 == LDP, Q1_5th_3_5, 
                      ifelse(Q1_6th_1_5 == LDP, Q1_6th_3_5, NA))))))
           ) %>%
    mutate(votedblc1 = Q1s2_KOUHOSYA_6 == DPJ_1,
           votedblc2 = Q1s2_KOUHOSYA_6 == DPJ_2,
           votesglc = Q1s2_KOUHOSYA_6 == LDP) %>%
    mutate(prefdblparty = Q6_11_5,
           prefsglparty = Q6_10_5) %>%
    mutate(partyIDdbl = Q8_5 == 2,
           partyIDsgl = Q8_5 == 1,
           partyIDLDP = Q8_5 == 1,
           partyIDDPJ = Q8_5 == 2)


## Exclude Gifu Prefecture beacause two DPJ candidates were 
##  closely (r > 0.9) competing for the last seat (non-discriminating equilibrium)
jes4 <- data %>%
        filter(PREFECTURE_SAMPLE_5 != 21) %>%
        dplyr::select(matches("^(pref|vote|party)+"), age, gender, university) %>%
        rename(PREF = PREFECTURE_SAMPLE_5) %>%
        dplyr::select(-PREFECTURE_SAMPLE_6) %>%
        mutate(jes = 4)


## Combine JES3 and JES4 data
## Exclude respondents who do not vote for candidates
## whose party does not run two candidates in the districts
data <- rbind(jes3, jes4) %>%
        filter((votedblc1 + votedblc2) == 1)


## Estimation ====================

## Main data for the analysis: "data"
## See the codebook "kickout_codebook.txt" for details
head(data)

## Kick-out voting theory expects that the preference for the candidate 
##  of the single-candidate party (prefsglc) reduces the incentive of 
##  voting for the second ranked candidate of the double-candidate party

## Main models
### Prefecture fixed effects
result_c1 <- 
summary_j(data = data, 
          formula = votedblc2 ~ prefsglc + prefdblc1 + prefdblc2 + prefdblparty +
                                factor(PREF),
          type = "c", fe = 1)

### Prefecture and year fixed effects
result_c2 <- 
summary_j(data = data, 
          formula = votedblc2 ~ prefsglc + prefdblc1 + prefdblc2 + prefdblparty +
                                factor(PREF) + factor(jes),
          type = "c", fe = 2)

unique(subset(data, jes == 3)$PREF)
unique(subset(data, jes == 4)$PREF)
## Only PREF20 exists in both dataset (thus, PREF + jes equals PREF * jes)

## Main models with covariates
### Prefecture fixed effects
result_c1c <- 
summary_j(data = data, 
          formula = votedblc2 ~ prefsglc + prefdblc1 + prefdblc2 + prefdblparty +
                                factor(PREF) +
                                age + gender + university + partyIDLDP + partyIDDPJ,
          type = "c", fe = 1)

### Prefecture and year fixed effects
result_c2c <- 
summary_j(data = data, 
          formula = votedblc2 ~ prefsglc + prefdblc1 + prefdblc2 + prefdblparty +
                                factor(PREF) + factor(jes) +
                                age + gender + university + partyIDLDP + partyIDDPJ,
          type = "c", fe = 2)


## Robustness checks using preferences for the single-candidate party
### Prefecture fixed effects
result_p1a <- 
summary_j(data = data, 
          formula = votedblc2 ~ prefsglparty + prefdblc1 + prefdblc2 + prefdblparty + factor(PREF),
          type = "pa", fe = 1)

### Prefecture and year fixed effects
result_p2a <- 
summary_j(data = data, 
          formula = votedblc2 ~ prefsglparty + prefdblc1 + prefdblc2 + prefdblparty + factor(PREF) + factor(jes),
          type = "pa", fe = 2)

## Robustness checks using preferences for the single-candidate party with cavariates
### Prefecture fixed effects
result_p1ac <- 
summary_j(data = data, 
          formula = votedblc2 ~ prefsglparty + prefdblc1 + prefdblc2 + prefdblparty + factor(PREF) +
                                age + gender + university + partyIDdbl,
          type = "pa", fe = 1)

### Prefecture and year fixed effects
result_p2ac <- 
summary_j(data = data, 
          formula = votedblc2 ~ prefsglparty + prefdblc1 + prefdblc2 + prefdblparty + factor(PREF) + factor(jes) +
                                age + gender + university + partyIDdbl,
          type = "pa", fe = 2)

## Robustness checks using preferences for parties
### Prefecture fixed effects
result_p1b <- 
summary_j(data = data, 
          formula = votedblc2 ~ prefsglparty + prefdblparty + factor(PREF),
          type = "pb", fe = 1)

### Prefecture and year fixed effects
result_p2b <- 
summary_j(data = data, 
          formula = votedblc2 ~ prefsglparty + prefdblparty + factor(PREF) + factor(jes),
          type = "pb", fe = 2)

## Robustness checks using preferences for parties with cavariates
### Prefecture fixed effects
result_p1bc <- 
summary_j(data = data, 
          formula = votedblc2 ~ prefsglparty + prefdblparty + factor(PREF) +
                                age + gender + university + partyIDLDP + partyIDDPJ,
          type = "pb", fe = 1)

### Prefecture and year fixed effects
result_p2bc <- 
summary_j(data = data, 
          formula = votedblc2 ~ prefsglparty + prefdblparty + factor(PREF) + factor(jes) +
                                age + gender + university + partyIDLDP + partyIDDPJ,
          type = "pb", fe = 2)


## Visualize the average marginal effects
### Prefecture fixed effects
## Figure A.1
set.seed(20201234)
res <- glm(votedblc2 ~ prefsglc + prefdblc1 + prefdblc2 + prefdblparty +
                       factor(PREF), 
           family = binomial(probit), x = TRUE, data = data) %>%
       ame(object = ., treat = seq(0, 100, by = 2), B = 10000, 
            xlab = "Pref. for the candidate of the single-candidate party            ",
            ylab = "Prob. of voting for the weak candidate\nof the double-candidate party",
            ylab2 = "Number of repondents",
            y2scale = 3,
            histobreaks = seq(-2.5, 102.5, by = 5))
res$figure
ggsave("figures/japan_ame.pdf", width = 18, height = 18, units = "cm")
q13 <- subset(data, is.na(prefsglc) + is.na(prefdblc1) + 
                    is.na(prefdblc2) + is.na(prefdblparty) == 0)$prefsglc %>%
       quantile(., probs = c(0.25, 0.75))
res <- glm(votedblc2 ~ prefsglc + prefdblc1 + prefdblc2 + prefdblparty, 
           family = binomial(probit), x = TRUE, data = data) %>%
       amedif(object = ., treat = q13[1:2], B = 10000)
res


## LaTeX table
## Table A.1
rnamec <- c("Pref.~for the candidate",
            "~~of the single-candidate party",
            "Pref.~for the strong candidate",
            "~~of the double-candidate party",
            "Pref.~for the weak candidate",
            "~~of the double-candidate party",
            "\\multirow{2}{*}{Pref.~for the double-candidate party}",
            "",
            "District fixed effects",
            "Election fixed effects",
            "Covariates",
            "Number of observations")

resultc <- data.frame(rnamec,
                      NA,
                      c(result_c1[1:10], NA, result_c1[11]),
                      NA,
                      c(result_c1c[1:10], "$\\checkmark$", result_c1c[11]),
                      NA,
                      c(result_c2[1:10], NA, result_c2[11]),
                      NA,
                      c(result_c2c[1:10], "$\\checkmark$", result_c2c[11]))

clabelsc <- c("", "", "Model 1", "", "Model 2", "", "Model 3", "", "Model 4")
captionc <- "Kicking out the Candidate of the Rival Party in Japan"
notec <- 
"\\parbox{0.92\\textwidth}
{Notes: Coefficients and standard errors are multiplied by 100. 
The sample consists of voters who vote for either of the candidates of the double-candidate party.
Covariates include respondents' age, gender, education, and party ID.
Standard errors in parentheses.}"

latex(object = resultc,
      title = "",
      file = "tables/japanc.tex",
      label = "tb_japanc",
      caption = captionc,
      insert.bottom = notec,
      first.hline.double = FALSE,
      rowname = NULL,
      colheads = clabelsc,
      col.just = c("l", rep("c", ncol(resultc) - 1)),
      n.rgroup = c(8, 4),
      longtable = FALSE,
      center = "centering")


## Table A.2
rnamepa <- c("\\multirow{2}{*}{Pref.~for the single-candidate party}",
             "",
             "Pref.~for the strong candidate",
             "~~of the double-candidate party",
             "Pref.~for the weak candidate",
             "~~of the double-candidate party",
             "\\multirow{2}{*}{Pref.~for the double-candidate party}",
             "",
             "District fixed effects",
             "Election fixed effects",
             "Covariates",
             "Number of observations")

resultpa <- data.frame(rnamepa,
                       NA,
                       c(result_p1a[1:10], NA, result_p1a[11]),
                       NA,
                       c(result_p1ac[1:10], "$\\checkmark$", result_p1ac[11]),
                       NA,
                       c(result_p2a[1:10], NA, result_p2a[11]),
                       NA,
                       c(result_p2ac[1:10], "$\\checkmark$", result_p2ac[11]))

clabelspa <- c("", "", "Model 5", "", "Model 6", "", "Model 7", "", "Model 8")
captionpa <- "Kicking out the Candidate of the Rival Party in Japan: Robustness Checks 1"
notepa <- 
"\\parbox{0.92\\textwidth}
{Notes: Coefficients and standard errors are multiplied by 100. 
The sample consists of voters who vote for either of the candidates of the double-candidate party.
Covariates include respondents' age, gender, education, and party ID.
Standard errors in parentheses.}"

latex(object = resultpa,
      title = "",
      file = "tables/japanpa.tex",
      label = "tb_japanpa",
      caption = captionpa,
      insert.bottom = notepa,
      first.hline.double = FALSE,
      rowname = NULL,
      colheads = clabelspa,
      col.just = c("l", rep("c", ncol(resultpa) - 1)),
      n.rgroup = c(8, 4),
      longtable = FALSE,
      center = "centering")


## Table A.3
rnamepb <- c("\\multirow{2}{*}{Pref.~for the single-candidate party}",
             "",
             "\\multirow{2}{*}{Pref.~for the double-candidate party}",
             "",
             "District fixed effects",
             "Election fixed effects",
             "Covariates",
             "Number of observations")

resultpb <- data.frame(rnamepb,
                       NA,
                       c(result_p1b[1:6], NA, result_p1b[7]),
                       NA,
                       c(result_p1bc[1:6], "$\\checkmark$", result_p1bc[7]),
                       NA,
                       c(result_p2b[1:6], NA, result_p2b[7]),
                       NA,
                       c(result_p2bc[1:6], "$\\checkmark$", result_p2bc[7]))

clabelspb <- c("", "", "Model 9", "", "Model 10", "", "Model 11", "", "Model 12")
captionpb <- "Kicking out the Candidate of the Rival Party in Japan: Robustness Checks 2"
notepb <- 
"\\parbox{0.95\\textwidth}
{Notes: Coefficients and standard errors are multiplied by 100. 
The sample consists of voters who vote for either of the candidates of the double-candidate party.
Covariates include respondents' age, gender, education, and party ID.
Standard errors in parentheses.}"

latex(object = resultpb,
      title = "",
      file = "tables/japanpb.tex",
      label = "tb_japanpb",
      caption = captionpb,
      insert.bottom = notepb,
      first.hline.double = FALSE,
      rowname = NULL,
      colheads = clabelspb,
      col.just = c("l", rep("c", ncol(resultpb) - 1)),
      n.rgroup = c(4, 4),
      longtable = FALSE,
      center = "centering")

